
*Sequence:
*1: Master_engel_lpoly_v1_bootstrap.do (This file, needs marco to make clean inputs and FO correction)
*Calls:	
*engel_lpoly_loop_v1_bs.do and engel_lpoly_analyze_v1_bs.do to run and analyze engle curves, 
*first_order_correction_v1.do and exact_correction_v1.do for FO and exact corrections,
*figures_v1.do to make Figures 2-5 runs over mang G groups, provides inputs for Figure 6


* inputs needed:
*raw hh data "$dropbox/replication_files/data/intermediate_data/hh_shares/hh_shares_43_55${Gi_spec}_rural.dta"
*market characteristic "$dropbox/replication_files/data/intermediate_data/R43R55/market_characteristics_43_55_rural.dta"
*item codes "$dropbox/replication_files/data/intermediate_data/hh_shares/item_codes`DMI'.dta"
*cpi "$output/CPI_district_X_decile_level_V4"
*bootstraps of CPI  $dropbox/replication_files/data/intermediate_data/conventional_price_indices/bootstrap/CPI_district_X_decile_level_V4_bs`nn'
*price changes: "$dropbox/replication_files/data/intermediate_data/conventional_price_indices/d_ln_p_goods_i_55_43.dta"



clear all
set more off
set matsize 11000


set seed 277491  // first run: 9509732   from random.org

local gtools "g"  // code uses gtools. if you don't have gtools add "local gtools """ to your else if


*---------------------------------folders-------------------
if "`c(username)'"=="da334" | "`c(username)'"=="David" | "`c(username)'"=="Atkin" | "`c(username)'"=="dga" | "`c(username)'"=="atkin" {
global dump "C:/Scratch"
global dropbox "C:/Work/Engel_GFT"
global stataloc "C:/Dropbox/Stata15/StataMP-64"
global codeloc "$dropbox/replication_files/do_files"
global output "$dropbox/replication_files/data/intermediate_data/conventional_price_indices"
}
if "`c(username)'"=="dga" {
global dump "G:/Scratch" 
}
*-------------------------------------------------------------------

local foldno "1" // name of folder where everthing gets saved
local bstotal "1000"  // total number of boostraps


cap mkdir "${dump}/Engel`foldno'/"  // this is just a scratch folder for temp files
capture mkdir "$dropbox/replication_files/data/intermediate_data/Engel_curves/
capture mkdir "$dropbox/replication_files/data/intermediate_data/Engel_curves/Engel_V`foldno'/" // this is where dtas go at end


global share_spec "rshare"   // this is the type of shares in that file that we use
global band_spec "g25"  // this is bandwidth (formed in engel_lpoly_loop_v1_bs.do)





foreach bs of numlist 0 1/`bstotal'  {  //     0 means no bootstrap, then have as many bootstraps as you want (50 here).
local zzz "15"  // this is the G grouping, In figure 6 we show 107 other G groupings.
global Gi_spec "V1_DM_G108_`zzz'"   // This is the hh file we use, this calls "$dropbox/replication_files/data/intermediate_data/hh_shares/hh_shares_43_50_55${Gi_spec}_rural.dta" 





*This pulls the number of markets from the data do this
use "$dropbox/replication_files/data/intermediate_data/hh_shares/hh_shares_43_55${Gi_spec}_rural.dta"  , clear  
sum market_id
local nmarket=r(max)



*this is where I draw a boostrap sample. if bs=0 this is the non bootstrapped sample
if "`bs'"!="0" {
bsample  , strata(market_year_id)  
save "$dropbox/replication_files/data/intermediate_data/hh_shares/hh_shares_43_55${Gi_spec}_rural_bs`bs'.dta", replace
}
if "`bs'"=="0" {
save "$dropbox/replication_files/data/intermediate_data/hh_shares/hh_shares_43_55${Gi_spec}_rural_bs`bs'.dta", replace
}


keep ${share_spec}_*
local prodcount=c(k)






******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************

*Block 1: Run loops over each market estimating Engel curves and then processing them

******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************




local instances=9  // number of stata windows run simultaneously. Lower this number if you are running into bottlenecks
local blocksize=round(`nmarket'/`instances')



local instcount=0 

****************************************
*1a) estimate relative engel curves
****************************************



forval xstart=1(`blocksize')`nmarket' { 
local xfinish=`xstart'+`blocksize'-1
local instcount=`instcount'+1
winexec "${stataloc}" do "${codeloc}/engel_lpoly_loop_V`foldno'_bs.do" `xstart' `xfinish' ${Gi_spec} ${share_spec} ${band_spec} "${dump}" "${dropbox}" `instcount' `foldno' `bs'
sleep 10000
}

*exit, clear STATA
local counter=0
while `counter'<`instances' {

local counter=0
forval checker=1/`instances' {
	capture confirm file "${dump}/Engel`foldno'/InstanceA_${band_spec}_${share_spec}_${Gi_spec}_`checker'_bs`bs'.dta"
		if _rc==0 {
		local counter=`counter'+1
		}
	}
sleep 10000
}



forval checker=1/`instances' {
erase "${dump}/Engel`foldno'/InstanceA_${band_spec}_${share_spec}_${Gi_spec}_`checker'_bs`bs'.dta"
}
local checker1=`instances'+1
cap erase "${dump}/Engel`foldno'/InstanceA_${band_spec}_${share_spec}_${Gi_spec}_`checker1'_bs`bs'.dta"


erase "$dropbox/replication_files/data/intermediate_data/hh_shares/hh_shares_43_55${Gi_spec}_rural_bs`bs'.dta"





****************************************
*1b) now take engel curves and calaculate P0, P1, welfare etc
****************************************


local instcount=0 

forval xstart=1(`blocksize')`nmarket' { 
local xfinish=`xstart'+`blocksize'-1
local instcount=`instcount'+1
winexec "${stataloc}" do "${codeloc}/engel_lpoly_analyze_V`foldno'_bs.do" `xstart' `xfinish' ${Gi_spec} ${share_spec} ${band_spec}  "${dump}" "${dropbox}" `instcount'  `foldno' `bs'
sleep 10000
}


local counter=0
while `counter'<`instances' {

local counter=0
forval checker=1/`instances' {
	capture confirm file "${dump}/Engel`foldno'/InstanceB_${band_spec}_${share_spec}_${Gi_spec}_`checker'_bs`bs'.dta"
		if _rc==0 {
		local counter=`counter'+1
		}
	}
sleep 10000
}

forval checker=1/`instances' {
erase "${dump}/Engel`foldno'/InstanceB_${band_spec}_${share_spec}_${Gi_spec}_`checker'_bs`bs'.dta"
}
local checker1=`instances'+1
cap erase "${dump}/Engel`foldno'/InstanceB_${band_spec}_${share_spec}_${Gi_spec}_`checker1'_bs`bs'.dta"








******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************

*Block 2: Combine Markets and Create Market-Decile and National-Decile Averages

******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************



****************************************
**2a) First combine results to make dataset 
****************************************





foreach band in   $band_spec       {  
foreach share in  $share_spec  { 
foreach DMI in  $Gi_spec  { 




local shareshort=substr("`share'",1,3)



clear
forval market=1/`nmarket' {
capture confirm file "${dump}/Engel`foldno'/Engel_rindexes_`band'_`share'_`DMI'_`market'_bs`bs'.dta"
if _rc==0 {
use "${dump}/Engel`foldno'/Engel_rindexes_`band'_`share'_`DMI'_`market'_bs`bs'.dta", clear
cap drop Z* 

cap renvars S*, postsub(x )


cap drop r??predict_lnmpcew 
cap drop r??round 
cap drop r??market_year_id


save "${dump}/Engel`foldno'/XEngel_rindexes_`band'_`share'_`DMI'_`market'_bs`bs'.dta", replace
erase "${dump}/Engel`foldno'/Engel_rindexes_`band'_`share'_`DMI'_`market'_bs`bs'.dta"
*to clean up the individual engel curves delete except for bs=0 that need to adjust
if "`bs'"!="0" {
erase "${dump}/Engel`foldno'/Engel_`band'_`share'_`DMI'_`market'_bs`bs'.dta"  
}

}
}


clear
forval market=1/`nmarket' {
noi di "append `market'"
cap append using "${dump}/Engel`foldno'/XEngel_rindexes_`band'_`share'_`DMI'_`market'_bs`bs'.dta", force
cap erase "${dump}/Engel`foldno'/XEngel_rindexes_`band'_`share'_`DMI'_`market'_bs`bs'.dta"
}



egen id=group(market_id percentile)

renvars id market_id sector state43 district43 r43count_hh r43total_wt percentile r55count_hh r55total_wt  avg_count_hh, prefix(X)
cap renvars  r50count_hh r50total_wt , prefix(X)
save "${dump}/Engel`foldno'/XEngel_wide_rindexes_`band'_`share'_`DMI'_bs`bs'.dta", replace




use W* X*  using  "${dump}/Engel`foldno'/XEngel_wide_rindexes_`band'_`share'_`DMI'_bs`bs'.dta", clear
renvars X*, presub(X)

local stublist "" 
	qui ds *_p50
	local stubs "`r(varlist)'"
	tokenize `stubs'
	forval n=1/20000 {
	local `n'=regexr("``n''","_p50$","")
	local stublist `stublist' ``n''
	}

noi di "reshape"


reshape long  `stublist' , i(id) j(item_decile) string
gen decile=regexr(item_decile,"_p","")
destring decile, replace
keep W* market_id decile
egen tag=tag(market_id decile)
keep if tag==1
drop tag
save "${dump}/Engel`foldno'/XEngel_all_rindexes_W_`band'_`share'_`DMI'_bs`bs'.dta", replace


forval g=1/`prodcount' {
use *`shareshort'*_`g'_* X*  using  "${dump}/Engel`foldno'/XEngel_wide_rindexes_`band'_`share'_`DMI'_bs`bs'.dta", clear

renvars X*, presub(X)

local stublist "" 
	qui ds *_`g'_p50
	local stubs "`r(varlist)'"
	tokenize `stubs'
	forval n=1/20000 {
	local `n'=regexr("``n''","_`g'_p50$","")
	local stublist `stublist' ``n''
	}

noi di "reshape"


reshape long  `stublist' , i(id) j(item_decile) string

gen decile=regexr(item_decile,".*_p","")
destring decile, replace
gen item_id=regexr(item_decile,"_p.*","")
replace item_id=regexr(item_id,"^_","")
destring item_id, replace


drop id 

save "${dump}/Engel`foldno'/XEngel_all_rindexes_g`g'_`band'_`share'_`DMI'_bs`bs'.dta", replace
}

clear
forval g=1/`prodcount' {
append using "${dump}/Engel`foldno'/XEngel_all_rindexes_g`g'_`band'_`share'_`DMI'_bs`bs'.dta"
erase "${dump}/Engel`foldno'/XEngel_all_rindexes_g`g'_`band'_`share'_`DMI'_bs`bs'.dta"
}

merge m:1 market_id decile using "${dump}/Engel`foldno'/XEngel_all_rindexes_W_`band'_`share'_`DMI'_bs`bs'.dta", nogenerate

save "${dump}/Engel`foldno'/XEngel_all_rindexes_`band'_`share'_`DMI'_bs`bs'.dta", replace   
erase "${dump}/Engel`foldno'/XEngel_wide_rindexes_`band'_`share'_`DMI'_bs`bs'.dta"
erase "${dump}/Engel`foldno'/XEngel_all_rindexes_W_`band'_`share'_`DMI'_bs`bs'.dta"

}
}
}








****************************************
**2b) Now flag non monotonic curves etc.
****************************************



foreach band in   $band_spec       {  
foreach share in  $share_spec  { 
foreach DMI in  $Gi_spec  { 


local shareshort=substr("`share'",1,3)


use "${dump}/Engel`foldno'/XEngel_all_rindexes_`band'_`share'_`DMI'_bs`bs'.dta", clear 

erase "${dump}/Engel`foldno'/XEngel_all_rindexes_`band'_`share'_`DMI'_bs`bs'.dta"

local starbwlist "" 
qui ds P4355lp_*_`shareshort'*
local stubs "`r(varlist)'"
tokenize `stubs'
forval n=1/20000 {
local `n'=regexr("``n''","^P4355lp_","P*")
local starbwlist `starbwlist' ``n''
}



*exclude non monotonic curves (make this an extra variant)
sort market_id decile item_id percentile
foreach var of varlist `starbwlist'  { 
local namer=regexr("`var'","_`shareshort'","m_`shareshort'")
gen `namer'=`var'  
replace `namer'=. if percentile<=9999 &  substr(string(`var'[_n+6]),-1,1)=="0"  // replace value=. if not monotonic
replace `namer'=98 if percentile>9999 & percentile<=99999 & `namer'==. & substr(string(`var'[_n+3]),-1,1)=="0" // reason not monotonic
if regexm("`var'","^P")==1 {
gen m`namer'=1 if percentile<=9999 &  substr(string(`var'[_n+6]),-1,1)!="0"  // indicator for monotonicity
}
}


*exclude non monotonic curves when both rounds are monotonic (make this an extra variant)
sort market_id decile item_id percentile
foreach var of varlist `starbwlist'  {  
local namer=regexr("`var'","_`shareshort'","n_`shareshort'")
gen `namer'=`var' if percentile>9999 
replace `namer'=`var' if percentile<=9999 &  substr(string(`var'[_n+6]),-1,1)=="1"  &  substr(string(`var'[_n+6]),-4,1)=="1"  // replace value=. if not monotonic
replace `namer'=`var' if percentile<=9999 &  substr(string(`var'[_n+6]),-1,1)=="2"  &  substr(string(`var'[_n+6]),-4,1)=="2"  // replace value=. if not monotonic
replace `namer'=98 if percentile>9999 & percentile<=99999 & `namer'==. & (substr(string(`var'[_n+3]),-1,1)=="0" | substr(string(`var'[_n+3]),-4,1)=="0") // reason
replace `namer'=97 if percentile>9999 & percentile<=99999 & `namer'==. & (substr(string(`var'[_n+3]),-1,1)=="1" & substr(string(`var'[_n+3]),-4,1)=="2") // reason
replace `namer'=97 if percentile>9999 & percentile<=99999 & `namer'==. & (substr(string(`var'[_n+3]),-1,1)=="2" & substr(string(`var'[_n+3]),-4,1)=="1") // reason

if regexm("`var'","^P")==1 {
gen n`namer'=1 if percentile<=9999 &  substr(string(`var'[_n+6]),-1,1)=="1"  &  substr(string(`var'[_n+6]),-4,1)=="1"  // indicator for monotonicity
replace  n`namer'=1 if percentile<=9999 &  substr(string(`var'[_n+6]),-1,1)=="2"  &  substr(string(`var'[_n+6]),-4,1)=="2" 
}

}

egen tag_md=tag(percentile decile market_id)


cap merge m:1 item_id using "$dropbox/replication_files/data/intermediate_data/hh_shares/item_codes`DMI'.dta", keepusing(supergroup*) nogenerate

compress

save "${dump}/Engel`foldno'/temp_rindexes_`band'_`share'_`DMI'_bs`bs'.dta", replace


keep if percentile==9901
compress
save "$dropbox/replication_files/data/intermediate_data/Engel_curves/Engel_V`foldno'/temp_rindexes_`band'_`share'_`DMI'_destslope_4orthog.dta", replace





}
}
}



if "`bs'"=="0" {
*now we calculate the first order approximation. this pulls slope coefs from "$dropbox/replication_files/data/intermediate_data/Engel_curves/Engel_V`foldno'/temp_rindexes_`band'_`share'_`DMI'_destslope_4orthog.dta" files above
winexec "${stataloc}" do "${codeloc}/first_order_correction_v`foldno'.do"

sleep 600000
*give a little time for this to run

}








****************************************
*2c) Now calculate averages at the market_decile level
****************************************



qui {


foreach band in   wbw$band_spec       {  
foreach share in  $share_spec  { 
foreach DMI in  $Gi_spec  {


local xshare "`share'"


use "${dump}/Engel`foldno'/temp_rindexes_${band_spec}_`xshare'_`DMI'_bs`bs'.dta", clear

erase "${dump}/Engel`foldno'/temp_rindexes_${band_spec}_`xshare'_`DMI'_bs`bs'.dta"






*clean up and keep the cen specification
renvars  *`band'cen_*  , prefix(x)  
drop P*`band'* 

drop T*`band'*
cap drop n*`band'* 
cap drop m*`band'*
renvars x*`band'*, presub("x")




local bwlist "" 
qui ds  P4355lp_`band'*_`xshare' // 
local stubs "`r(varlist)'"
tokenize `stubs'
forval n=1/20000 {
local `n'=regexr("``n''","_`xshare'$","")
local `n'=regexr("``n''","^P4355lp_","")
local bwlist `bwlist' ``n''
}


local xshareshort=substr("`xshare'",1,3)
local xsharelist "" 
local firstbw=word("`bwlist'",-1)


qui ds  nP4355lp_`firstbw'_`xshareshort'* // 
local stubs "`r(varlist)'"
tokenize `stubs'
forval n2=1/20000 {
local `n2'=regexr("``n2''","^nP4355lp_`firstbw'_","")
local xsharelist `xsharelist' ``n2''
}






sort market_id decile item_id percentile
`gtools'egen group=group(decile market_id)  
sum group
local groupmax=r(max)

gen r4355total_wt=r43total_wt + r55total_wt
gen r5543total_wt=r43total_wt + r55total_wt

cap gen r4350total_wt=r43total_wt + r50total_wt
cap gen r5043total_wt=r43total_wt + r50total_wt

cap gen r5550total_wt=r55total_wt + r50total_wt
cap gen r5055total_wt=r55total_wt + r50total_wt

cap gen r555043total_wt=r55total_wt + r50total_wt + r43total_wt 
cap gen r435055total_wt=r55total_wt + r50total_wt + r43total_wt 


noi di "`xsharelist'"

foreach xsh in   `xsharelist'  {  




preserve



foreach xxxsh in   `xsharelist'  {
if "`xxxsh'"!="`xsh'" {
drop *`xxxsh'*
}
}


noi di "making Good_by_good_`xsh'_`band'_v4_`DMI'.dta"

noi di "`bwlist'"

foreach bw in   `bwlist'  {  




noi di "`bw'"




foreach tip in 4355 5543   { 

local nout=regexr("S`tip'lp_`bw'_`xsh'","n_`xsh'","_`xsh'")
cap gen S`tip'lp_`bw'_`xsh'=`nout'

`gtools'egen P`tip'lp_n`bw'_`xsh'=mean(P`tip'lp_`bw'_`xsh') if percentile<=9999,by(percentile decile market_id)

`gtools'egen P`tip'lp_m`bw'_`xsh'=median(P`tip'lp_`bw'_`xsh') if percentile<=9999,by(percentile decile market_id)



**********so now we have the averages across goods for each market


*fix reason for miss entries for n and m that should be missing (when non monotonic)
if regexm("`bw'","m$")==1  {
  replace P`tip'lp_`bw'_`xsh'=.  if percentile>9999 & percentile<=99999   &   mP`tip'lp_`bw'_`xsh'[_n-3]!=1  
}
if regexm("`bw'","n$")==1 {
  replace P`tip'lp_`bw'_`xsh'=.  if percentile>9999 & percentile<=99999   &   nP`tip'lp_`bw'_`xsh'[_n-3]!=1  
}

*this counts the number of non missings (or missings if on percentile>9999
`gtools'egen oP`tip'lp_`bw'_`xsh'=count(P`tip'lp_`bw'_`xsh'),by(percentile decile market_id)



if regexm("`bw'","m$")==1 {
 `gtools'egen xP`tip'lp_`bw'_`xsh'=count(mP`tip'lp_`bw'_`xsh'),by(percentile decile market_id) // just count of monotonic
 gen omP`tip'lp_`bw'_`xsh'=oP`tip'lp_`bw'_`xsh'/xP`tip'lp_`bw'_`xsh'

 gen temp=P`tip'lp_`bw'_`xsh'[_n+3]  if percentile<=9999    &   mP`tip'lp_`bw'_`xsh'==1  
 replace temp=. if temp==99 // if origin is non monotonic we get 99
 `gtools'egen amP`tip'lp_`bw'_`xsh'=mean(temp), by(percentile decile market_id) // degree of overlap above/below
 drop temp
}

if regexm("`bw'","n$")==1 {
 `gtools'egen xP`tip'lp_`bw'_`xsh'=count(nP`tip'lp_`bw'_`xsh'),by(percentile decile market_id) // just count of monotonic
 gen onP`tip'lp_`bw'_`xsh'=oP`tip'lp_`bw'_`xsh'/xP`tip'lp_`bw'_`xsh'

 *use reasons for no overlap when monotonic (above or below)
 gen temp=P`tip'lp_`bw'_`xsh'[_n+3]  if percentile<=9999    &   nP`tip'lp_`bw'_`xsh'==1  
 replace temp=. if temp==99 // these should not exist
 `gtools'egen anP`tip'lp_`bw'_`xsh'=mean(temp), by(percentile decile market_id) // degree of overlap above/below
 drop temp
 
 
 ********************************************************************
 ****bias correction in this clean case (when know if above or below)
 
 
  
 `gtools'egen xmin=min(P`tip'lp_`bw'_`xsh'), by(percentile decile market_id)
 `gtools'egen xmax=max(P`tip'lp_`bw'_`xsh'), by(percentile decile market_id)
 
  gen xrank=P`tip'lp_`bw'_`xsh' if percentile<=9999    &   nP`tip'lp_`bw'_`xsh'==1 
  *now replace missings with large or small value depending which end it is at
  replace xrank=xmin-0.000001 if P`tip'lp_`bw'_`xsh'[_n+3]==1 & percentile<=9999    &   nP`tip'lp_`bw'_`xsh'==1  & P`tip'lp_`bw'_`xsh'==.
  * start going forward: 43-->55. so what is going on here is if we have +1 in [_n+3] it means we are too poor to hit. that means the missing values have the smallest price index changes (and the price index changes we do observe are too large)
  * so give the lowest possible value -999999 
  replace xrank=xmax+0.000001 if P`tip'lp_`bw'_`xsh'[_n+3]==-1 & percentile<=9999    &   nP`tip'lp_`bw'_`xsh'==1  & P`tip'lp_`bw'_`xsh'==.
  *similarly if we have -1 we are too rich to hit, then the observed price changes are smaller than when we miss so want a large possible value. 
  *everything is flipped if going 55-->43 but since price index is negative in that case code is same.
   
 `gtools'egen xmed=median(xrank),by(percentile decile market_id)
 *so now take the median which is estimate of mean of syymetric distribution
 
  gen P`tip'lp_z`bw'_`xsh'=xmed if xmed<=(xmax) & xmed>=(xmin)
  *so this replaces with missing if don't actually observe median
  
 
   *if observe missings at one end or the other, use (min-max)/onP as range and min or max as estimate of end.
   gen P`tip'lp_x`bw'_`xsh'=P`tip'lp_z`bw'_`xsh'
   replace P`tip'lp_x`bw'_`xsh'=xmin+(xmax-xmin)/(onP`tip'lp_`bw'_`xsh'*2) if  P`tip'lp_x`bw'_`xsh'==. & anP`tip'lp_`bw'_`xsh'==-1 // this is median if observe min
   replace P`tip'lp_x`bw'_`xsh'=xmax-(xmax-xmin)/(onP`tip'lp_`bw'_`xsh'*2) if  P`tip'lp_x`bw'_`xsh'==. & anP`tip'lp_`bw'_`xsh'==1 // this is median if observe min
   *this probably deals with most cases. rare when have missing either end and don't estimate median
  
  
   gen P`tip'lp_y`bw'_`xsh'=xmed if xmed<=(xmax+0.0000006) & xmed>=(xmin-0.0000006)
   *now replace median with max/min when even number and max/min in middle (stata's median takes average)
  
   *if observe missings at one end or the other, use (min-max)/onP as range and min or max as estimate of end.
   gen P`tip'lp_w`bw'_`xsh'=P`tip'lp_y`bw'_`xsh'
   replace P`tip'lp_w`bw'_`xsh'=xmin+(xmax-xmin)/(onP`tip'lp_`bw'_`xsh'*2) if  P`tip'lp_w`bw'_`xsh'==. & anP`tip'lp_`bw'_`xsh'==-1 // this is median if observe min
   replace P`tip'lp_w`bw'_`xsh'=xmax-(xmax-xmin)/(onP`tip'lp_`bw'_`xsh'*2) if  P`tip'lp_w`bw'_`xsh'==. & anP`tip'lp_`bw'_`xsh'==1 // this is median if observe min
   *this probably deals with most cases. rare when have missing either end and don't estimate median
 
 
 
 * Sarhan correction when missing median
 gen  xwt=1 if xrank!=.
 gen xwt_x1=xwt
 replace xwt_x1=0 if (xrank>=xmin)  & xrank!=.
 gen xwt_x2=xwt
 replace xwt_x2=0 if (xrank<xmin |  xrank>xmax) & xrank!=.
 gen xwt_x3=xwt
 replace xwt_x3=0 if (xrank<=xmax) & xrank!=.
 
  `gtools'egen xsum1=total( xwt_x1) , by(percentile decile market_id)
  `gtools'egen xsum2=total( xwt_x2) , by(percentile decile market_id)
  `gtools'egen xsum3=total( xwt_x3) , by(percentile decile market_id)
 *this is from  ESTIMATION OF THE MEAN AND STANDARD DEVIATION BY ORDER STATISTICS. PART III, A. E. SARHAN. 
 gen P`tip'lp_u`bw'_`xsh'=0.5*(1/(xsum1+xsum2+xsum3-xsum1-xsum3-1))*((xsum1+xsum2+xsum3-2*xsum3-1)*xmin + (xsum1+xsum2+xsum3-2*xsum1-1)*xmax)
 
 
 gen P`tip'lp_o`bw'_`xsh'=P`tip'lp_z`bw'_`xsh'
 replace P`tip'lp_o`bw'_`xsh'=P`tip'lp_u`bw'_`xsh' if P`tip'lp_z`bw'_`xsh'==.
 
 
 
 drop xwt* xsum*
 
 
 drop xrank xmax xmin xmed




 
}

*no crossing (when looking at 99901) (these are national)
gen temp=P`tip'lp_`bw'_`xsh' 
`gtools'egen o0P`tip'lp_`bw'_`xsh'=count(temp),by(percentile decile)
drop temp

*no crossing and above (when looking at 99901) (these are national)
gen temp=P`tip'lp_`bw'_`xsh' if P`tip'lp_`bw'_`xsh'==-1
`gtools'egen o1P`tip'lp_`bw'_`xsh'=count(temp),by(percentile decile)
drop temp

*no crossing and below (when looking at 99901) (these are national)
gen temp=P`tip'lp_`bw'_`xsh' if P`tip'lp_`bw'_`xsh'==1
`gtools'egen o3P`tip'lp_`bw'_`xsh'=count(temp),by(percentile decile)
drop temp


}




*clean up
foreach tip in 4355 5543  { 
drop P`tip'lp_`bw'_`xsh'
drop nP`tip'lp_`bw'_`xsh'
drop S`tip'lp_`bw'_`xsh'
}


}





foreach pvar of varlist P* {
local wvar=regexr("`pvar'","^P","W")
local rounder=regexr("`pvar'","^P","")
local rounder=regexr("`rounder'","lp.*","")
gen `wvar'=W`rounder'_lnmpcew - `pvar'
}

*these go backwards and I make negative
foreach rounds in  5543   { 
foreach zvar of varlist P`rounds'lp_* W`rounds'lp_* {  
replace `zvar'=-`zvar'
}
}

drop W*_lnmpcew
drop group

drop tag_md
`gtools'egen tag_md=tag(percentile decile market_id)

keep if tag_md==1
*keep if percentile<=9999
*so these are now at tag_md level, dataset of all the changes at each decile.

gen range=substr(string(percentile),-4,4)
gen stat=subinstr(string(percentile),range,"",1)
replace stat="mean" if stat==""
replace stat="no overlap reason" if stat=="9"
replace stat="monotonicity" if stat=="99"
drop percentile


renvars *`xsh', postsub(`xsh' XX )


reshape wide *XX ,  i(decile market_id stat) j(range) string

renvars *XX*, sub(XX )




*now make the long differences

local zbwlist "" 
qui ds  P4355lp_n* 
local stubs "`r(varlist)'"
tokenize `stubs'
forval n=1/20000 {
local `n'=regexr("``n''","^P4355lp_n","")
local zbwlist `zbwlist' ``n''
}



foreach zbw in  `zbwlist'  {

if regexm("`zbw'","m_9")==1 | regexm("`zbw'","n_9")==1  {
local mon=substr("`zbw'",-6,1)
*jsut average the percent that overlap across the two jumps
cap gen o`mon'P435055lp_`zbw'=(o`mon'P4350lp_`zbw'+o`mon'P5055lp_`zbw')/2
cap gen o`mon'P555043lp_`zbw'=(o`mon'P5043lp_`zbw'+o`mon'P5550lp_`zbw')/2
}



}



save "$dropbox/replication_files/data/intermediate_data/Engel_curves/Engel_V`foldno'/Good_by_good_${band_spec}_`xsh'_`DMI'_bs`bs'.dta", replace




*now make a smaller clean file:
use "$dropbox/replication_files/data/intermediate_data/Engel_curves/Engel_V`foldno'/Good_by_good_${band_spec}_`xsh'_`DMI'_bs`bs'.dta", clear


if ""=="_extrapolate" {
local niner "9900"
local extrape "e"
cap drop *9999 
cap drop *9901
}
else {
local niner "9901"
local extrape "cen"
*first, different specification just showing long changes
}

cap drop *9505 
cap drop *9703
drop ?P*
drop ??P*
keep if stat=="mean"

foreach tip in 4355 5543  {  
gen W`tip'_lnmpcew=W`tip'lp_n`band'`extrape'_`niner'+P`tip'lp_n`band'`extrape'_`niner'
}
save "$dropbox/replication_files/data/intermediate_data/Engel_curves/Engel_V`foldno'/Short_${band_spec}_`xsh'_`DMI'_bs`bs'.dta", replace




restore

}




}
}

}


}








******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************

*Block 4: Run exact corrections
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************


******************************************************************************
******************************************************************************
******************************************************************************





if "`bs'"=="0" {

*first get market charastceristics used here and below
winexec "${stataloc}" do "${codeloc}/market_characteristics.do"
sleep 100000

*call exact correction file. this takes a few mins to run
winexec "${stataloc}" do "${codeloc}/exact_correction_v`foldno'.do"
sleep 100000


}










******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************

*Block 5: Get decile level aggregates

******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************


******************************************************************************
******************************************************************************
******************************************************************************





***Now national paasche and laspeyres changes
 

foreach share  in $share_spec { 
foreach DMI in $Gi_spec { 
foreach band in $band_spec   { 

local cutoff "100"
local specT "ce"
local spec2 "o"
local spec "`specT'n"



*merge in conventional CPI price indices from Deaton

use "$output/CPI_district_X_decile_level_V4", clear
keep if sector=="Rural"

gen dist_wt=((_43_dist_wt*10) +_55_dist_wt)/2

*use filled in CPI as main CPI
drop ?cpi_??_??_decile
rename Lcpi_fill_43_55_decile Lcpi_43_55_decile
rename Pcpi_fill_43_55_decile Pcpi_43_55_decile



merge m:1 sector state43 district43 using  "$output/CPI_district_level_V4" , keepusing(L* P*) keep(match master) nogenerate
rename Lcpi_fill_43_55 Lcpi_demo_43_55 
rename Pcpi_fill_43_55 Pcpi_demo_43_55 



drop sector
gen sector=1
merge m:1 sector state43 district43 using  "$dropbox/replication_files/data/intermediate_data/R43R55/market_characteristics_43_55_rural.dta" , keepusing(r??_lmpce_n) keep(match master) nogenerate

gen decile=decile_dist*10

merge 1:1 sector state43 district43 decile using ///
"$dropbox/replication_files/data/intermediate_data/Engel_curves/Engel_V`foldno'/Short_`band'_`share'_`DMI'_bs`bs'.dta" , keepusing(W*_lnmpcew P*lp_`spec2'wbw`band'`spec'_9901 )  keep(match master) nogenerate

gen stat="mean"
merge 1:1 sector state43 district43 decile stat using ///
"$dropbox/replication_files/data/intermediate_data/Engel_curves/Engel_V`foldno'/Good_by_good_`band'_`share'_`DMI'_bs`bs'.dta" , keepusing(xP*lp_wbw`band'`spec'_9901 oP*lp_wbw`band'`spec'_9901)  keep(match master) nogenerate



merge 1:1 sector state43 district43 decile using ///
"$dropbox/replication_files/data/intermediate_data/Engel_curves/Engel_V`foldno'/bias_1st_order_5543.dta"  , keepusing(bias_full_demo_5543)  keep(match master) nogenerate

merge 1:1 sector state43 district43 decile using ///
"$dropbox/replication_files/data/intermediate_data/Engel_curves/Engel_V`foldno'/bias_1st_order_4355.dta"  , keepusing(bias_full_demo_4355)  keep(match master) nogenerate




keep if r43_lmpce_n>=`cutoff'  & r55_lmpce_n>=`cutoff'  




sum Lcpi_43_55_decile if decile_dist==1,d
if r(max)>200 {
winsor Lcpi_43_55_decile, gen(Lcpi_43_55_decile_2) p(.01)
sum Lcpi_43_55_decile_2 if decile_dist==1,d
drop Lcpi_43_55_decile
rename Lcpi_43_55_decile_2 Lcpi_43_55_decile
}








foreach var of varlist  P4355lp_`spec2'wbw`band'*_9901   {  
gen `var'_f07=`var'-(0.3*bias_full_demo_4355)
}

foreach var of varlist  P5543lp_`spec2'wbw`band'*_9901   { 
gen `var'_f07=`var'-(0.3*bias_full_demo_5543)
}




foreach var of varlist W*_lnmpcew P*lp_`spec2'wbw`band'*_9901 P*lp_`spec2'wbw`band'*_9901_f07  { 
gen V`var'=exp(`var')
}

foreach var of varlist Lcpi_* Pcpi_* {
gen Z`var'=log(`var')
}




foreach initial in 43  55 { 
foreach final in 43   55 { 

if  "`initial'`final'"!="5055" & "`initial'`final'"!="5550" {

if `initial'!=`final' {
bysort decile_dist: egen ZY_`initial'_`final'_x = wtmean( W`initial'`final'_lnmpcew ), weight(dist_wt)
gen ZY_`initial'`final'_decile=100*(exp(ZY_`initial'_`final'_x)-1)
}

if `initial'!=`final' {

bysort decile_dist: egen ZE_`initial'_`final'_x = wtmean( P`initial'`final'lp_`spec2'wbw`band'`spec'_9901), weight(dist_wt)  // this is log change

gen ZE_`initial'`final'_decile=100*(exp(ZE_`initial'_`final'_x)-1)

gen WWZE_`initial'`final'_decile=(100*(ZY_`initial'`final'_decile+100)/(ZE_`initial'`final'_decile+100))-100

foreach sig in  "07"   { 

bysort decile_dist: egen ZE_`initial'_`final'_x_f`sig' = wtmean( P`initial'`final'lp_`spec2'wbw`band'`spec'_9901_f`sig'), weight(dist_wt)  // this is log change
gen ZE_`initial'`final'_decile_f`sig'=100*(exp(ZE_`initial'_`final'_x_f`sig')-1)
}

}


if `initial'<`final' {



bysort decile_dist: egen ZL_`initial'_`final'_x = wtmean( ZLcpi_`initial'_`final'_decile ), weight(dist_wt)
bysort decile_dist: egen ZP_`initial'_`final'_x = wtmean( ZPcpi_`initial'_`final'_decile ), weight(dist_wt)


gen ZL_`initial'`final'_decile=100*(exp(ZL_`initial'_`final'_x)-1)
gen ZP_`initial'`final'_decile=100*(exp(ZP_`initial'_`final'_x)-1)



bysort decile_dist: egen ZLcpi_demo_`initial'_`final'_x = wtmean( ZLcpi_demo_`initial'_`final'), weight(dist_wt)
bysort decile_dist: egen ZPcpi_demo_`initial'_`final'_x = wtmean( ZPcpi_demo_`initial'_`final'), weight(dist_wt)


gen ZL_demo_`initial'`final'_decile=100*(exp(ZLcpi_demo_`initial'_`final'_x)-1)
gen ZP_demo_`initial'`final'_decile=100*(exp(ZPcpi_demo_`initial'_`final'_x)-1)


gen WWZL_`initial'`final'_decile=(100*(ZY_`initial'`final'_decile+100)/(ZL_`initial'`final'_decile+100))-100
gen WWZP_`initial'`final'_decile=(100*(ZY_`initial'`final'_decile+100)/(ZP_`initial'`final'_decile+100))-100


}


}
}
}

egen tag=tag(decile)
keep if tag==1


save "$dropbox/replication_files/data/intermediate_data/Engel_curves/Engel_V`foldno'/Estimate_`band'_`share'_`DMI'_bs`bs'.dta", replace




}
}
}




**/


}
*this is the loop for bootstrap















****now bring CPI bootstraps
qui {

foreach band in   $band_spec       {  
foreach share in  $share_spec  {
foreach DMI in  $Gi_spec  {

local cutoff "100"


foreach nn of numlist 1/`bstotal' {


noi di "bs `nn'"

use "$output/bootstrap/CPI_district_X_decile_level_V4_bs`nn'", clear
keep if sector=="Rural"


gen dist_wt=((_43_dist_wt*10) +_55_dist_wt)/2

*use filled in CPI as main CPI
drop ?cpi_??_??_decile
rename Lcpi_fill_43_55_decile Lcpi_43_55_decile
rename Pcpi_fill_43_55_decile Pcpi_43_55_decile



merge m:1 sector state43 district43 using  "$output/bootstrap/CPI_district_level_V4_bs`nn'.dta" , keepusing(L* P*) keep(match master) nogenerate
rename Lcpi_fill_43_55 Lcpi_demo_43_55 
rename Pcpi_fill_43_55 Pcpi_demo_43_55 

drop sector
gen sector=1


gen decile=decile_dist*10

merge m:1 sector state43 district43 using  "$dropbox/replication_files/data/intermediate_data/R43R55/market_characteristics_43_55_rural.dta" , keepusing(r??_lmpce_n) keep(match master) nogenerate
keep if r43_lmpce_n>=`cutoff'  & r55_lmpce_n>=`cutoff'  

foreach var of varlist Lcpi_* Pcpi_* {
gen Z`var'=log(`var')
}



foreach initial in 43 55 {   
foreach final in 43  55 { 
if `initial'<`final' {

bysort decile_dist: egen ZL_`initial'_`final'_x = wtmean( ZLcpi_`initial'_`final'_decile ), weight(dist_wt)
bysort decile_dist: egen ZP_`initial'_`final'_x = wtmean( ZPcpi_`initial'_`final'_decile ), weight(dist_wt)

gen ZL_`initial'`final'_decile=100*(exp(ZL_`initial'_`final'_x)-1)
gen ZP_`initial'`final'_decile=100*(exp(ZP_`initial'_`final'_x)-1)

bysort decile_dist: egen ZLcpi_demo_`initial'_`final'_x = wtmean( ZLcpi_demo_`initial'_`final'), weight(dist_wt)

bysort decile_dist: egen ZPcpi_demo_`initial'_`final'_x = wtmean( ZPcpi_demo_`initial'_`final'), weight(dist_wt)



gen ZL_demo_`initial'`final'_decile=100*(exp(ZLcpi_demo_`initial'_`final'_x)-1)
gen ZP_demo_`initial'`final'_decile=100*(exp(ZPcpi_demo_`initial'_`final'_x)-1)


}
}
}



egen tag=tag(decile)
keep if tag==1
keep ZL_*_decile ZP_*_decile  decile
gen bs=`nn'
merge 1:1 decile using "$dropbox/replication_files/data/intermediate_data/Engel_curves/Engel_V`foldno'/Estimate_`band'_`share'_`DMI'_bs`nn'.dta", nogenerate keepusing (ZY_*_decile ZE_*_decile  WWZE_*_decile)

foreach initial in 43 55 { 
foreach final in 43  55 { 
if `initial'<`final' {


gen WWZL_`initial'`final'_decile=(100*(ZY_`initial'`final'_decile+100)/(ZL_`initial'`final'_decile+100))-100
gen WWZP_`initial'`final'_decile=(100*(ZY_`initial'`final'_decile+100)/(ZP_`initial'`final'_decile+100))-100


}
}
}


save "${dump}/Estimate_short_CPI_`band'_`share'_`DMI'_bs`nn'.dta", replace




}






clear 



use "$dropbox/replication_files/data/intermediate_data/Engel_curves/Engel_V`foldno'/Estimate_`band'_`share'_`DMI'_bs0.dta"
renvars ZE*  WWZE* ZY* ZL* ZP*  WWZL* WWZP*, prefix(X)


foreach nn of numlist 1/`bstotal' {
cap append using "${dump}/Estimate_short_CPI_`band'_`share'_`DMI'_bs`nn'.dta"
cap erase "${dump}/Estimate_short_CPI_`band'_`share'_`DMI'_bs`nn'.dta"
}






foreach thing in ZE_5543 ZE_4355  WWZE_5543 WWZE_4355 ZY_5543 ZY_4355 ZL_4355 ZP_4355   WWZL_4355 WWZP_4355 ZL_demo_4355 ZP_demo_4355 {

egen `thing'_decile_pctile975=pctile(`thing'_decile), by(decile) p(97.5)
egen `thing'_decile_pctile025=pctile(`thing'_decile), by(decile) p(2.5)

gen `thing'_decile_pctile975_v2=2*X`thing'_decile-`thing'_decile_pctile025
gen `thing'_decile_pctile025_v2=2*X`thing'_decile-`thing'_decile_pctile975

}

egen tagz=tag(decile)
keep if tagz==1


keep decile *_pctile975* *_pctile025*
save "$dropbox/replication_files/data/intermediate_data/Engel_curves/Engel_V`foldno'/Estimate_short_CPI_`band'_`share'_`DMI'_bs_combo.dta", replace




}
}
}


}






******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************

*Block 6: Make figures

******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************


******************************************************************************
******************************************************************************
******************************************************************************





winexec "${stataloc}" do "${codeloc}/figures_v`foldno'.do"
sleep 10000

winexec "${stataloc}" do "${codeloc}/manyG_groups.do"


















